start_time <-Sys.time()suppressPackageStartupMessages(library(ggpath))suppressPackageStartupMessages(library(plotly))suppressPackageStartupMessages(library(tidyverse))options(scipen = 10L)jam_theme <-theme_minimal() +theme(text=element_text(size=14),axis.text=element_text(size=12),axis.title.y =element_text(margin =margin(t =0, r =8, b =0, l =0)),axis.title.x =element_text(margin =margin(t =0, r =0, b =8, l =0)))jam_theme <-theme_minimal() +theme(axis.title.y =element_text(margin =margin(t =0, r =8, b =0, l =0)),axis.title.x =element_text(margin =margin(t =0, r =0, b =8, l =0)),text=element_text(size=20, face="bold", color="white"),axis.text.x=element_text(size=10, color="white"),axis.text.y=element_text(size=13, color="white"),plot.title=element_text(face="bold", color="white"),plot.background =element_rect(fill ="#5E61AF"),plot.margin =margin(1,1,1.5,1.2, "cm"))# Create color pallete based on lineup image @ https://coolors.co/image-pickermy_cols <-c("#52BFEC","#AA1880","#EC0059","#08BCDF","#4C1064", "#FF00BC", "#2249CD","#53007D", "#FF6B02","#B319B2","#EAE100", "#BF068F")
Summary
This notebook shows how I searched the Spotify and last.fm APIs to find data on EDC 2024 artists. I was curious to find the most popular artists.
#https://www.imagetotext.io/ to get artist names from edc artist lineup PNGedc_artists <-read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artists_text_from_png.txt") |>unique() |>arrange(artist)#edc_artists
plot.top.artists("followers", 30, "Top 30 EDC artists with the most followers on Spotify", include_images ="false")
Spotify “Popularity”
The “popularity” metric is “track-based and a measure of how many plays a track received and how recent those plays are. An artist’s popularity is calculated from the popularity of all the artist’s tracks.”
Code
plot.top.artists("popularity", 10, "Top 10 most popular EDC artists according to Spotify", include_images ="true")
Code
plot.top.artists("popularity", 30, "Top 30 most popular EDC artists according to Spotify", include_images ="false")
last.fm Global Listeners
Code
plot.top.artists("global_listeners", 10, "Top 10 artists with the most listeners on last.fm", include_images ="true")
Code
plot.top.artists("global_listeners", 30, "Top 30 artists with the most listeners on last.fm", include_images ="false")
---title: "EDC 2024"author: "Joe"date: "`r Sys.Date()`"editor: sourceformat: html: theme: quartz #fontcolor: black toc: true toc-location: left code-fold: true code-tools: true df-print: kable fig-width: 10 fig-height: 7.5 embed-resources: true grid: body-width: 2000pxexecute: warning: false---```{r}start_time <-Sys.time()suppressPackageStartupMessages(library(ggpath))suppressPackageStartupMessages(library(plotly))suppressPackageStartupMessages(library(tidyverse))options(scipen = 10L)jam_theme <-theme_minimal() +theme(text=element_text(size=14),axis.text=element_text(size=12),axis.title.y =element_text(margin =margin(t =0, r =8, b =0, l =0)),axis.title.x =element_text(margin =margin(t =0, r =0, b =8, l =0)))jam_theme <-theme_minimal() +theme(axis.title.y =element_text(margin =margin(t =0, r =8, b =0, l =0)),axis.title.x =element_text(margin =margin(t =0, r =0, b =8, l =0)),text=element_text(size=20, face="bold", color="white"),axis.text.x=element_text(size=10, color="white"),axis.text.y=element_text(size=13, color="white"),plot.title=element_text(face="bold", color="white"),plot.background =element_rect(fill ="#5E61AF"),plot.margin =margin(1,1,1.5,1.2, "cm"))# Create color pallete based on lineup image @ https://coolors.co/image-pickermy_cols <-c("#52BFEC","#AA1880","#EC0059","#08BCDF","#4C1064", "#FF00BC", "#2249CD","#53007D", "#FF6B02","#B319B2","#EAE100", "#BF068F")```# SummaryThis notebook shows how I searched the Spotify and last.fm APIs to find data on EDC 2024 artists. I was curious to find the most popular artists. # EDC 2024 Lineup# Convert lineup image to textCreate text list of EDC artists using [imagetotext.io](https://www.imagetotext.io/)```{r}#https://www.imagetotext.io/ to get artist names from edc artist lineup PNGedc_artists <-read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artists_text_from_png.txt") |>unique() |>arrange(artist)#edc_artists```# Collect Artist Data## SpotifyAccess Spotify API using package [spotifyr](https://www.rdocumentation.org/packages/spotifyr/versions/2.2.4)You need to set up a Dev account with Spotify to access their Web API [here](https://developer.spotify.com/dashboard/create). It is very quick and easy.```{r}#| include: falseSys.setenv(SPOTIFY_CLIENT_ID ='15ef5105d34c4cdaa761f8287847f50a')Sys.setenv(SPOTIFY_CLIENT_SECRET ='c1c04d84b2094e5994759aa3275b9683')``````{r}#| eval: false#install.packages("spotifyr")library(spotifyr)# set up a dev account to get valid API IDs# Sys.setenv(SPOTIFY_CLIENT_ID = '########################')# Sys.setenv(SPOTIFY_CLIENT_SECRET = '#########################')access_token <-get_spotify_access_token()# Use search_spotify() to find spotify artist ids from artist names# No ID for Domina, Hint of Lavender, Marlie, VUIIIGURspotify_artist_id <-vector("character", length(edc_artists$artist))for(i inseq_along(edc_artists$artist)){ i_search_spotify <-search_spotify(edc_artists$artist[i])if(edc_artists$artist[i] %in%toupper(i_search_spotify$artists$items$name)){ exact_artist_name_match <-which(toupper(i_search_spotify$artists$items$name) == edc_artists$artist[i])[1] spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[exact_artist_name_match] }else{ spotify_artist_id[[i]] <-NAcat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i], "\n\n"))# "\n", "Using: ", i_search_spotify$artists$items$name[1], # "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n"))# spotify_artist_id[[i]] <- i_search_spotify$artists$items$id[1]# cat(paste0("\nNo exact match for artist name: ",edc_artists$artist[i],# "\n", "Using: ", i_search_spotify$artists$items$name[1], # "\n", i_search_spotify$artists$items$external_urls.spotify[1], "\n\n")) }}edc_artists$spotify_artist_id <- spotify_artist_id# Use get_artist() to get genres, followers, and popularitygenres <-vector("character", length(edc_artists$artist))followers <-vector("character", length(edc_artists$artist))popularity <-vector("character", length(edc_artists$artist))image_url <-vector("character", length(edc_artists$artist))for(i inseq_along(edc_artists$spotify_artist_id)){if(!is.na(edc_artists$spotify_artist_id[i])){ i_artist_info <-get_artist(edc_artists$spotify_artist_id[i]) genres[[i]] <-paste0(i_artist_info$genres, collapse =",") followers[[i]] <- i_artist_info$followers$total popularity[[i]] <- i_artist_info$popularity image_url[[i]] <-ifelse(!is.null(i_artist_info$images$url[1]), i_artist_info$images$url[1], NA) }else{ genres[[i]] <- followers[[i]] <- popularity[[i]] <- image_url[[i]] <-NA }if(!is.na(genres[[i]]) & genres[[i]] ==""){genres[[i]] <-NA}}edc_artists$genres <- genres edc_artists$followers <-as.numeric(followers)edc_artists$popularity <-as.numeric(popularity)edc_artists$image_url <- image_url```## last.fmAccess last.fm API using package [lastfmR](https://github.com/ppatrzyk/lastfmR)```{r}#| eval: false#devtools::install_github("ppatrzyk/lastfmR")library(lastfmR)# masks get_tracks()lastfm_artist_info <-get_artist_info(artist_vector = edc_artists$artist) |>tibble()edc_artists <-full_join(edc_artists, lastfm_artist_info)# write.table(edc_artists, "C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt", row.names = F, quote = F, sep = "\t")```# Spotify Followers```{r}#| fig-width: 15#| fig-height: 9edc_artists <-read_tsv("C:/Users/joseph.mcgirr/Personal/R_fun/spotify/edc_2024_artist_data.txt")plot.top.artists <-function(column_name, top, plot_title, include_images ="false"){i_plot <-arrange(edc_artists, desc(!!sym(column_name))) |>head(top) p1 <-ggplot(i_plot, aes(reorder(artist, !!sym(column_name)), !!sym(column_name))) +geom_segment(aes(x =reorder(artist, !!sym(column_name)), xend =reorder(artist, !!sym(column_name)), y=0, yend =!!sym(column_name), color = artist),linewidth =3) +#geom_from_path(aes(path = image_url), width = 0.052) +coord_flip(clip ="off") +scale_color_manual(values =rep(my_cols,3)) + jam_theme +theme(axis.title.x=element_blank(),axis.title.y=element_blank(),legend.position ="none") +ggtitle(plot_title)if(include_images =="true"){ p1 <- p1 +geom_from_path(aes(path = image_url), width =0.052) }return(p1)}plot.top.artists("followers", 10, "Top 10 EDC artists with the most followers on Spotify", include_images ="true")plot.top.artists("followers", 30, "Top 30 EDC artists with the most followers on Spotify", include_images ="false")```# Spotify "Popularity"The "popularity" metric is "track-based and a measure of how many plays a track received and how recent those plays are. An artist’s popularity is calculated from the popularity of all the artist’s tracks."```{r}#| fig-width: 15#| fig-height: 9plot.top.artists("popularity", 10, "Top 10 most popular EDC artists according to Spotify", include_images ="true")plot.top.artists("popularity", 30, "Top 30 most popular EDC artists according to Spotify", include_images ="false")```# last.fm Global Listeners```{r}#| fig-width: 15#| fig-height: 9plot.top.artists("global_listeners", 10, "Top 10 artists with the most listeners on last.fm", include_images ="true")plot.top.artists("global_listeners", 30, "Top 30 artists with the most listeners on last.fm", include_images ="false")```# Genres```{r}#| fig-width: 15#| fig-height: 9# filter(edc_artists, !is.na(genres)) |> nrow()# n_distinct(edc_artists$genres)all_genres <-unlist(str_split(edc_artists$genres, ",")) |>na.omit() |>as.character() |>str_trim() |>toupper() |>tibble()names(all_genres) <-"genre"plot.top.genres <-function(all_genres, column_name, plot_title){ i_plot <-group_by(all_genres, genre) |>summarise(n_genres = dplyr::n()) |>arrange(desc(n_genres)) |>head(30) p1 <-ggplot(i_plot, aes(reorder(genre, !!sym(column_name)), !!sym(column_name))) +geom_segment(aes(x =reorder(genre, !!sym(column_name)), xend =reorder(genre, !!sym(column_name)), y=0, yend =!!sym(column_name), color = genre),linewidth =3) +coord_flip(clip ="off") +scale_color_manual(values =rep(my_cols,100)) + jam_theme +theme(axis.title.x=element_blank(),axis.title.y=element_blank(),legend.position ="none") +ggtitle(plot_title)return(p1)}plot.top.genres(all_genres, "n_genres", "Top 30 most represented Spotify genres")# # filter(edc_artists, !is.na(artist_tags)) |> nrow()# # n_distinct(edc_artists$artist_tags)# all_genres <- unlist(str_split(edc_artists$artist_tags, ";")) |> # na.omit() |> # as.character() |> # str_trim() |> # toupper() |> # tibble()# names(all_genres) <- "genre"# # # plot.top.genres(all_genres, "n_genres", "Top 30 most represented last.fm aritst tags")```## Popular artists in each genre```{r}#| fig-width: 15#| fig-height: 9top_genres <-group_by(all_genres, genre) |>summarise(n_genres = dplyr::n()) |>arrange(desc(n_genres)) |>filter(n_genres >=10) |>pull(genre)for(i inseq_along(top_genres)){ column_name <-"popularity" i_plot <-filter(edc_artists, str_detect(toupper(genres), top_genres[i])) |>arrange(desc(popularity)) |>head(10) p1 <-ggplot(i_plot, aes(reorder(artist, !!sym(column_name)), !!sym(column_name))) +geom_segment(aes(x =reorder(artist, !!sym(column_name)), xend =reorder(artist, !!sym(column_name)), y=0, yend =!!sym(column_name), color = artist),linewidth =3) +geom_from_path(aes(path = image_url), width =0.052) +coord_flip(clip ="off") +scale_color_manual(values =rep(my_cols,3)) + jam_theme +theme(axis.title.x=element_blank(),axis.title.y=element_blank(),legend.position ="none") +ggtitle(paste0("Top 10 ",top_genres[i]," artists by popularity"))print(p1)}```# Notes## Run time```{r}Sys.time() - start_time```## Session```{r}sessionInfo()```